Data load

DREAM model

#L1 = getContrast( vobjDream, form, merged_meta_mono, c("treatmentLPS", "treatmentBasal"))
#L2 = getContrast( vobjDream, form, merged_meta_mono, c("treatmentIFNg", "treatmentBasal"))

#L = cbind(L1, L2)
#fit = dream(vobjDream, form, merged_meta_mono, L)
#res_LPS_mono <- data.frame(topTable(fit, coef='L1', number=nrow(genes_counts_filt1), sort.by = "p"), check.names = F)
#res_IFNy_mono <- data.frame(topTable(fit, coef='L2', number=nrow(genes_counts_filt1), sort.by = "p"), check.names = F)
gencode_30 = read.table("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
#res_LPS_mono <- tibble::rownames_to_column(res_LPS_mono, "ensembl")
#res_LPS_monocytes <- merge(res_LPS_mono, gencode_30, by = "ensembl")
#res_IFNy_mono <- tibble::rownames_to_column(res_IFNy_mono, "ensembl")
#res_IFNy_monocytes <- merge(res_IFNy_mono, gencode_30, by = "ensembl")
#save(res_LPS_monocytes, file = "res_LPS_monocytes.Rdata")
#save(res_IFNy_monocytes, file = "res_IFNy_monocytes.Rdata")

LPS

DE genes 15% LPS

load("~/Documents/MiGASti/Databases/res_LPS_monocytes.Rdata")
sign_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.15)
length(rownames(sign_LPS))
## [1] 11205

DE genes 10% LPS

sign_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.10)
length(rownames(sign_LPS))
## [1] 10841

DE genes 5% LPS

sign_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
length(rownames(sign_LPS))
## [1] 10219

FDR distibution

res = res_LPS_monocytes
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_classic() +
ggtitle("FDR Distribution")

Fold change distribution

p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_classic() +
ggtitle("Fold Change Distribution")

MA plot

plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.05))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.05", "< 0.05")) +
labs(title = "MA plot", color = "labels") +
theme_classic()

#theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)

Volcano plot

vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.05", "< 0.05")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
#theme(plot.title = element_text(hjust = 0.5)) +
theme_classic() +
xlim(-5,5) + ylim(0, 30) + ylab("-log10 pvalue")
## Warning: Removed 269 rows containing missing values (geom_point).

load("~/Documents/MiGASti/docs/res_name_LPS2.Rdata")

Data table for download

res_LPS_diff_top = res_LPS_monocytes[, c("ensembl", "symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "z.std")]
createDT(res_LPS_diff_top)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

IFNy

DE genes 15% IFNy

load("~/Documents/MiGASti/Databases/res_IFNy_monocytes.Rdata")
sign_IFNy <- subset(res_LPS_monocytes, adj.P.Val < 0.15)
length(rownames(sign_IFNy))
## [1] 11205

DE genes 10% IFNy

sign_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.10)
length(rownames(sign_IFNy))
## [1] 8441

DE genes 5% IFNy

sign_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
length(rownames(sign_IFNy))
## [1] 7604

FDR distibution

res = res_IFNy_monocytes
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_classic() +
ggtitle("FDR Distribution")

Fold change distribution

p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_classic() +
ggtitle("Fold Change Distribution")

MA plot

plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.05))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.05", "< 0.05")) +
labs(title = "MA plot", color = "labels") +
theme_classic()

#theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)

Volcano plot

vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.05", "< 0.05")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
#theme(plot.title = element_text(hjust = 0.5)) +
theme_classic() +
xlim(-5,5) + ylim(0, 30) + ylab("-log10 pvalue")
## Warning: Removed 133 rows containing missing values (geom_point).

load("~/Documents/MiGASti/docs/res_name_LPS2.Rdata")

Data table for download

res_IFNy_diff_top = res_IFNy_monocytes[, c("ensembl", "symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "z.std")]
createDT(res_IFNy_diff_top)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

Overlap with significant microglia LPS genes FDR 5%

load("~/Documents/MiGASti/res_name_LPS2.Rdata")
sign_microglia_LPS <- subset(res_name_LPS2, adj.P.Val < 0.05)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
overlap <- merge(sign_microglia_LPS, sign_monocytes_LPS, by = "symbol")
set1 <- sign_microglia_LPS
set2 <- sign_monocytes_LPS

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia IFNy genes FDR 5%

load("~/Documents/MiGASti/docs/res_name_IFNy2.Rdata")
sign_microglia_IFNy <- subset(res_name_IFNy2, adj.P.Val < 0.05)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
overlap <- merge(sign_microglia_IFNy, sign_monocytes_IFNy, by = "symbol")
set1 <- sign_microglia_IFNy
set2 <- sign_monocytes_IFNy

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC > 1 microglia and monocytes > 5

sign_microglia_LPS <- subset(res_name_LPS2, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC > 1)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
sign_monocytes_LPS_LF <- subset(sign_monocytes_LPS, logFC > 5)
overlap <- merge(sign_monocytes_LPS_LF, sign_monocytes_LPS_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_LPS_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC < -1 microglia and monocytes -5

sign_microglia_LPS <- subset(res_name_LPS2, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC < -1)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
sign_monocytes_LPS_LF <- subset(sign_monocytes_LPS, logFC < -5)
overlap <- merge(sign_monocytes_LPS_LF, sign_monocytes_LPS_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_LPS_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia IFNy genes FDR 5% log FC > 1 and monocytes 5

sign_microglia_IFNy <- subset(res_name_IFNy2, adj.P.Val < 0.05)
sign_microglia_IFNy_LF <- subset(sign_microglia_IFNy, logFC > 1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC > 5)
overlap <- merge(sign_monocytes_IFNy_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_IFNy_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia IFNy genes FDR 5% log FC < -1 microglia and monocytes -5

sign_microglia_IFNy <- subset(res_name_IFNy2, adj.P.Val < 0.05)
sign_microglia_IFNy_LF <- subset(sign_microglia_IFNy, logFC < -1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC < -5)
overlap <- merge(sign_monocytes_IFNy_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_IFNy_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC > 1 + INFy monocytes > 5

sign_microglia_LPS <- subset(res_name_LPS2, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC > 1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC > 5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC > 1 + monocytes < -5

sign_microglia_LPS <- subset(res_name_LPS2, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC > 1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC < -5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia IFNy genes FDR 5% log FC > 1 + LPS monocytes < -5

sign_microglia_IFNy <- subset(res_name_IFNy2, adj.P.Val < 0.05)
sign_microglia_IFNy_LF <- subset(sign_microglia_IFNy, logFC > 1)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
sign_monocytes_LPS_LF <- subset(sign_monocytes_LPS, logFC < -5)
overlap <- merge(sign_microglia_IFNy_LF, sign_monocytes_LPS_LF, by = "symbol")
set1 <- sign_microglia_IFNy_LF
set2 <- sign_monocytes_LPS_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC > 1 + monocytes < -5

sign_microglia_LPS <- subset(res_name_LPS2, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC > 1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC < -5)
overlap <- merge(sign_microglia_IFNy_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC > 1 + LPS monocytes < -5

sign_microglia_LPS <- subset(res_name_LPS2, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC > 1)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
sign_monocytes_LPS_LF <- subset(sign_monocytes_LPS, logFC < -5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_LPS_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_LPS_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC < - 1 + LPS monocytes > 5

sign_microglia_LPS <- subset(res_name_LPS2, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC < -1)
sign_monocytes_LPS <- subset(res_LPS_monocytes, adj.P.Val < 0.05)
sign_monocytes_LPS_LF <- subset(sign_monocytes_LPS, logFC > 5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_LPS_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_LPS_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia IFNy genes FDR 5% log FC > 1 + IFNy monocytes < -5

sign_microglia_IFNy <- subset(res_name_IFNy2, adj.P.Val < 0.05)
sign_microglia_IFNy_LF <- subset(sign_microglia_IFNy, logFC > 1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC < -5)
overlap <- merge(sign_microglia_IFNy_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_IFNy_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC < - 1 + IFNy monocytes > 5

sign_microglia_LPS <- subset(res_name_LPS2, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC < -1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC > 5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Overlap with significant microglia LPS genes FDR 5% log FC < - 1 + IFNy monocytes < - 5

sign_microglia_LPS <- subset(res_name_LPS2, adj.P.Val < 0.05)
sign_microglia_LPS_LF <- subset(sign_microglia_LPS, logFC < -1)
sign_monocytes_IFNy <- subset(res_IFNy_monocytes, adj.P.Val < 0.05)
sign_monocytes_IFNy_LF <- subset(sign_monocytes_IFNy, logFC < - 5)
overlap <- merge(sign_microglia_LPS_LF, sign_monocytes_IFNy_LF, by = "symbol")
set1 <- sign_microglia_LPS_LF
set2 <- sign_monocytes_IFNy_LF

set1_v <- set1$symbol
set2_v <- set2$symbol

setEnrichment <- function(set1, set2, universe = 20000){

  a = sum(set1 %in% set2)

  c = length(set1) - a

  b = length(set2) - a

  d = universe - length(set2) - c

  contingency_table = matrix(c(a, c, b, d), nrow = 2)

  # one-tailed test for enrichment only

  fisher_results = fisher.test(contingency_table, alternative = "greater")

  # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value

  df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)

  return(df)
}

setEnrichment( set1 = set1_v, set2 = set2_v)

Scatterplot LogFC microglia vs monocytes LPS

#12812 genes

genes = merge(res_name_LPS2, res_LPS_monocytes, by ="symbol")

microglia <- genes[,3]
monocytes <- genes[,10]
df = data.frame(microglia, monocytes)
p <- ggplot(df, aes(x=microglia, y=monocytes) ) +
  geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_bin2d(bins = 100) +
  scale_fill_continuous(type = "viridis") +
  geom_abline(slope = 1, intercept = 0, linetype = 3) +
  theme_bw()
p + stat_cor(method = "spearman")

Scatterplot LogFC microglia vs monocytes IFNy

#12812 genes

genes = merge(res_name_IFNy2, res_IFNy_monocytes, by ="symbol")

microglia <- genes[,3]
monocytes <- genes[,10]
df = data.frame(microglia, monocytes)
p <- ggplot(df, aes(x=microglia, y=monocytes) ) +
  geom_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
  geom_bin2d(bins = 100) +
  scale_fill_continuous(type = "viridis") +
  geom_abline(slope = 1, intercept = 0, linetype = 3) +
  theme_bw()
p + stat_cor(method = "spearman")